package io.github.mosiki.modules.spider.service;

import com.alibaba.fastjson.JSONObject;
import io.github.mosiki.common.enums.ResultEnum;
import io.github.mosiki.common.exception.MagicxException;
import io.github.mosiki.common.utils.RandomUtil;
import io.github.mosiki.modules.spider.config.SpiderConfig;
import io.github.mosiki.modules.spider.config.WebMagicConfig;
import io.github.mosiki.modules.spider.dto.SpiderTaskDTO;
import io.github.mosiki.modules.spider.monitor.MySpiderMonitor;
import io.github.mosiki.modules.spider.monitor.MySpiderStatus;
import io.github.mosiki.modules.spider.processor.AbstractProcessor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Service;
import redis.clients.jedis.JedisPool;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.example.BaiduBaikePageProcessor;
import us.codecraft.webmagic.scheduler.RedisScheduler;

import javax.annotation.Resource;
import javax.management.JMException;
import java.util.List;
import java.util.Map;

@Service
@Slf4j
public class WebMagicService {

    @Resource
    private ApplicationContext context;
    @Resource
    private SpiderTaskService taskService;
    @Resource
    private JedisPool jedisPool;

    private Spider initSpider(SpiderTaskDTO taskDTO, WebMagicConfig config) {
        MySpiderMonitor spiderMonitor = MySpiderMonitor.instance();

        SpiderConfig spiderConfig = config.getSpider();
        AbstractProcessor pageProcess = context.getBean(spiderConfig.getProcesser(), AbstractProcessor.class);

        pageProcess.init(config);
        pageProcess.setUuid(taskDTO.getSpriderUuid());

        Spider spider = Spider.create(pageProcess).thread(spiderConfig.getThread());
        spider.setUUID(taskDTO.getSpriderUuid());

        List<String> pipelines = spiderConfig.getPipeline();
        for (String pipeline : pipelines) {
            Pipeline bean = context.getBean(pipeline, Pipeline.class);
            spider.addPipeline(bean);
        }
        // 设置Downloader
        // 设置Scheduler
        spider.setScheduler(new RedisScheduler(jedisPool));
        // 注册爬虫
        try {
            spiderMonitor.register(spider);
        } catch (JMException e) {
            log.error("爬虫启动失败：{}", e.getMessage());
            throw new MagicxException(ResultEnum.TASK_START_FAIL);
        }

        return spider;
    }

    public void run(SpiderTaskDTO taskDTO, boolean runAsync) {
        String ruleJson = taskDTO.getTaskRuleJson();
        WebMagicConfig config = JSONObject.parseObject(ruleJson, WebMagicConfig.class);
        Spider spider = this.initSpider(taskDTO, config);
        spider.addUrl(config.getSpider().getStartUrl());

        if (runAsync) {
            spider.runAsync();
        } else {
            spider.run();
        }
    }

    // 提供分布式支持
    public void runBreak(SpiderTaskDTO taskDTO, boolean runAsync) {
        // uuid 与需要分布式爬取的设置一致
        String ruleJson = taskDTO.getTaskRuleJson();
        WebMagicConfig config = JSONObject.parseObject(ruleJson, WebMagicConfig.class);
        Spider spider = this.initSpider(taskDTO, config);

        RedisScheduler redisScheduler = new RedisScheduler(jedisPool);
        Request request = redisScheduler.poll(spider);
        if (request == null) {
            throw new MagicxException(ResultEnum.TASK_FINISHED);
        }
        spider.addRequest(request);
        if (runAsync) {
            spider.runAsync();
        } else {
            spider.run();
        }
    }

    public void stop(MySpiderStatus spiderStatus) {
        spiderStatus.stop();
        spiderStatus.getSpider().close();
    }
}
